{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Python: Iterators, Generators and Comprehensions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Iterators\n", "\n", "An iterator is an object that implements the `next` protocol and raises StopIteration when exhausted. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "A list is an iterable object but not an iterator." ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": true }, "outputs": [], "source": [ "xs = [1,2,3]" ] }, { "cell_type": "code", "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'list' object is not an iterator\n" ] } ], "source": [ "try:\n", " next(xs)\n", "except Exception as e:\n", " print(e)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can make an iterator out of an iterable object by calling the `iter` function." ] }, { "cell_type": "code", "execution_count": 77, "metadata": {}, "outputs": [], "source": [ "xsi = iter(xs)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(xsi)" ] }, { "cell_type": "code", "execution_count": 79, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2" ] }, "execution_count": 79, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(xsi)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3" ] }, "execution_count": 80, "metadata": {}, "output_type": "execute_result" } ], "source": [ "next(xsi)" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "try:\n", " next(xsi)\n", "except Exception as e:\n", " print(type(e))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### For loops\n", "\n", "The for loop automatically constructs an iterator from an interator object, then repeatedly calls next until a StopIteration is raised." ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n", "3\n" ] } ], "source": [ "for x in xs:\n", " print(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Looping over collections" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Tuples and list preserve order" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a\n", "b\n", "c\n" ] } ], "source": [ "for x in ('a', 'b', 'c'):\n", " print(x)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "a\n", "b\n", "c\n" ] } ], "source": [ "for x in ['a', 'b', 'c']:\n", " print(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sets do not preserve order" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b\n", "c\n", "a\n" ] } ], "source": [ "for x in set(['a', 'b', 'b', 'c']):\n", " print(x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Dictionaries prserve order of entry (new)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c\n", "b\n", "a\n" ] } ], "source": [ "d = {'c': 1, 'b': 2, 'a': 3}\n", "for k in d:\n", " print(k)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n", "3\n" ] } ], "source": [ "for k in d.values():\n", " print(k)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "c 1\n", "b 2\n", "a 3\n" ] } ], "source": [ "for k, v in d.items():\n", " print(k, v)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Ranges" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "1\n", "2\n" ] } ], "source": [ "for i in range(3):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n", "3\n" ] } ], "source": [ "for i in range(1,4):\n", " print(i)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "3\n", "5\n" ] } ], "source": [ "for i in range(1,6,2):\n", " print(i)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Enumerate" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Traditional indexing" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": true }, "outputs": [], "source": [ "xs = ['a', 'b', 'c']" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 a\n", "1 b\n", "2 c\n" ] } ], "source": [ "for i in range(len(xs)):\n", " print(i, xs[i])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Standard Python idiom is to use `enumerate`" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 a\n", "1 b\n", "2 c\n" ] } ], "source": [ "for i, x in enumerate(xs):\n", " print(i, x)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10 a\n", "11 b\n", "12 c\n" ] } ], "source": [ "for i, x in enumerate(xs, start=10):\n", " print(i, x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Zip" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0, 'a'), (1, 'b'), (2, 'c')]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(zip(range(3), 'abc'))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 a\n", "1 b\n", "2 c\n" ] } ], "source": [ "for i, x in zip(range(3), 'abc'):\n", " print(i, x)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Generators\n", "\n", "A generator is a function that produces a sequence of results instead of a single value. As generators do not store the sequence that is generated, they are memory-efficient. They use the `yield` and `yield from` keywords to return values.\n", "\n", "Many built in Python functions return generators to minimize use of memory - e.g. `range`, `zip`, `map`, `filter` , `open` - you need to use a for loop to evaluate them." ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zip(range(10), 'abcdefghij')" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 a\n", "1 b\n", "2 c\n", "3 d\n", "4 e\n", "5 f\n", "6 g\n", "7 h\n", "8 i\n", "9 j\n" ] } ], "source": [ "for i, c in zip(range(10), 'abcdefghij'):\n", " print(i, c)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### You can also convert a generator to a list\n", "\n", "Make sure that you have enough memory to do the conversion as a list stores all its contents in memory." ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" ] }, "execution_count": 59, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(range(10))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generator functions" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def updown(n):\n", " \"\"\"Generator that goes up to n and down again.\"\"\"\n", " for i in range(n):\n", " yield i\n", " for i in range(n, -1, -1):\n", " yield i " ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0, " ] } ], "source": [ "for i in updown(5):\n", " print(i, end=', ')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def updown2(n):\n", " \"\"\"Alternative version using yield from.\"\"\"\n", " yield from range(n)\n", " yield from range(n, -1, -1)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0, 1, 2, 3, 4, 5, 4, 3, 2, 1, 0, " ] } ], "source": [ "for i in updown2(5):\n", " print(i, end=', ')" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def fib(n):\n", " \"\"\"Return the next Fibonacci number up to n.\"\"\"\n", " a, b = 1, 1\n", " for i in range(n):\n", " yield a\n", " a, b = b, a + b" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1, 1, 2, 3, 5, 8, 13, 21, 34, 55, " ] } ], "source": [ "for i in fib(10):\n", " print(i, end=', ')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Generator expressions\n", "\n", "Syntax sugar for simple generators" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def odd(n):\n", " \"\"\"Traditional generator.\"\"\"\n", " for i in range(n):\n", " if i %2 == 1:\n", " yield i" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1, 3, 5, 7, 9, " ] } ], "source": [ "for i in odd(10):\n", " print(i, end=', ')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Using a generator expression" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1, 3, 5, 7, 9, " ] } ], "source": [ "gs = (i for i in range(10) if i%2 == 1)\n", "for i in gs:\n", " print(i, end=', ')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Comprehensions\n", "\n", "Syntax sugar similar to that of generator expressions can be used to create lists, sets and dictionaries." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### List comprehension" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[i for i in range(10)]" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[1, 9, 25, 49, 81]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[i**2 for i in range(10) if i % 2 == 1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Set comprehension" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0, 1, 2, 3, 4, 5}" ] }, "execution_count": 94, "metadata": {}, "output_type": "execute_result" } ], "source": [ "{i for i in updown(5)}" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0, 1, 8, 27, 64, 125}" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "{i**3 for i in updown(5)}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Dictionary comprehension" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "subjects = ['ann', 'bob', 'charles']\n", "ages = [23, 34, 45]" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ann': 23, 'bob': 34, 'charles': 45}" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "{subject: age for subject, age in zip(subjects, ages)}" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }